
*************************************************************************************************
/*																								
						Purpose: Bring in 72-90 GSS cross sections and assign income scores		
						Creates: GSS_77to90_analysis.dta                 						
						Note: Will only keep '77-'90 cross sections because (1) '77 is the      
							   first	year that the foreignborn variable is available, and        
							   (2) one of two father occupation variables cleaned in this file   
							   (paocc16) is only available through 1990.					        
*/																								 
*************************************************************************************************

clear all
set more off
set maxvar 10000

* Bring in GSS data
cd "$Mydirectory1/1_DataSources/GSS/"

use ./input/GSS7218_R1.dta, clear //download from GSS website
sort year id

d
local r = `r(k)'
di "`r(k)'" 

keep if year<=1990

*------------------------------------------------------------------------------*
*------------------------------------------------------------------------------*

*********************
** OCCUPATION CODES USING THE 1970 CODES
*********************

/* Convert the 1970 census occupations of paocc16 into the ~30 ANES occupations. 
   Do the same for respondent and spouse occupation.
 */

ren *, lower

foreach var of varlist occ spocc paocc16  {

	rename `var' census1970 
	replace census1970=. if census1970==.i | census1970==.a | census1970==.d | census1970==999 

* Crosswalk here the 1970 occupations to the ANES occs (merge adds one variable: fatheroccej)
	merge m:1 census1970 using "../Crosswalks/Crosswalk_1970Census_toANES.dta"
	assert census1970==. if _merge==1
	drop if _merge==2
	tab census1970 if _merge==1, m
	drop _merge

* Replace people who are self-employed managers (currently coded as 28) with 21 if they are self-employed.
	
	if "`var'"=="occ" {
		
		replace fatheroccej=21 if fatheroccej==28 & wrkslf==1
		
		rename fatheroccej occR_70
		rename census1970 `var'
	}
	if "`var'"=="spocc" {
		
		replace fatheroccej=21 if fatheroccej==28 & spwrkslf==1
		
		rename fatheroccej occSP_70	
		rename census1970 `var'
	}
	if "`var'"=="paocc16" {
		replace fatheroccej=21 if fatheroccej==28 & pawrkslf==1
		
		rename census1970 `var'
	}
}

	rename fatheroccej fatheroccej_70 
	label var fatheroccej_70 "Father's occupation, coarsened"
	label var occSP_70 "Spouse occupation, coarsened"
	label var occR_70 "Resp. occupation, coarsened"


*------------------------------------------------------------------------------*
*------------------------------------------------------------------------------*
	
*********************
** OCCUPATIONS USING 2010 CODES 
*********************

/* Using a GSS-provided crosswalk, convert the 2010 census occupations of paocc10 
   into the ~30 ANES occupations. Do the same for respondent and spouse occupation.
*/

foreach var of varlist occ10 spocc10 paocc10   {

	rename `var' census2010 
	replace census2010=. if census2010==.i | census2010==.n | census2010==.d  | census2010==9997

* Crosswalk here the 2010 occupations to the ANES occs (merge adds one variable: fatheroccej)
	merge m:1 census2010 using "../Crosswalks/Crosswalk_2010Census_toANES.dta"
	tab census2010 if _merge==1, m
	assert census2010==. if _merge==1
	drop if _merge==2
	rename fatheroccej_2010 fatheroccej
	
	tab census2010 if fatheroccej==., m nol

	count if _merge==1 & fatheroccej==.
	drop _merge

* Replace people who are self-employed managers (currently coded as 28) with 21 if they are self-employed
if "`var'"=="occ10" {
	
	replace fatheroccej=21 if fatheroccej==28 & wrkslf==1
	
	rename fatheroccej occR_10
	rename census2010 `var'
}
if "`var'"=="spocc10" {
	
	replace fatheroccej=21 if fatheroccej==28 & spwrkslf==1
	
	rename fatheroccej occSP_10	
	rename census2010 `var'
}
if "`var'"=="paocc10" {
	replace fatheroccej=21 if fatheroccej==28 & pawrkslf==1
	rename census2010 `var'
}
}
	rename fatheroccej fatheroccej10
	label var fatheroccej10 "Father's occupation, coarsened"
	label var occSP_10 "Spouse occupation, coarsened"
	label var occR_10 "Resp. occupation, coarsened"
	
*------------------------------------------------------------------------------*	
*------------------------------------------------------------------------------*

*********************
** TOTAL FAMILY INCOME 
*********************
	
* Midpoints for 1972 income variable (use "income72" to construct)
	gen income72_bins=.
	replace income72_bins=0.75*2000 if income72==1 //<2000
	replace income72_bins=3000 if income72==2 //2-4k
	replace income72_bins=5000 if income72==3 //4-6k
	replace income72_bins=7000 if income72==4 //6-8k
	replace income72_bins=9000 if income72==5 //8-10k
	replace income72_bins=11250 if income72==6 //10-12.5k
	replace income72_bins=13750 if income72==7 //12.5-15k
	replace income72_bins=16250 if income72==8 //15-17.5k
	replace income72_bins=18750 if income72==9 //17.5-20k
	replace income72_bins=22500 if income72==10 //20-25k
	replace income72_bins=27500 if income72==11 //25-30k
	replace income72_bins=1.25*30000 if income72==12 //>=30k 
	replace income72_bins=. if year!=1972
	
* Midpoints of 1973-1976 income variable (use "income" to construct) 
	gen income_bins=.
	replace income_bins=0.75*1000 if income==1 //<1000
	replace income_bins=2000 if income==2 //1-3k
	replace income_bins=3500 if income==3 //3-4k
	replace income_bins=4500 if income==4 //4-5k
	replace income_bins=5500 if income==5 //5-6k
	replace income_bins=6500 if income==6 //6-7k
	replace income_bins=7500 if income==7 //7-8k
	replace income_bins=9000 if income==8 //8-10k
	replace income_bins=12500 if income==9 //10-15k
	replace income_bins=17500 if income==10 //15-20k
	replace income_bins=22500 if income==11 //20-25k
	replace income_bins=1.25*25000 if income==12 //>=25k 
	replace income_bins=. if year>=1977
	
* Midpoints of 1977-1980 income variable (use "income77" to construct)
	gen income77_bins=.
	replace income77_bins=0.75*1000 if income77==1 //<1000
	replace income77_bins=2000 if income77==2 //1-3k
	replace income77_bins=4000 if income77==3 | income77==4 //3-5k
	replace income77_bins=6000 if income77==5 | income77==6 //5-7k
	replace income77_bins=8500 if income77==7 | income77==8 //7-10k
	replace income77_bins=12500 if income77==9 | income77==10 //10-15k
	replace income77_bins=17500 if income77==11 | income77==12 //15-20k
	replace income77_bins=22500 if income77==13 | income77==14 //20-25
	replace income77_bins=37500 if income77==15 //25-50
	replace income77_bins=1.25*50000 if income77==16 //50k
	replace income77_bins=. if year<1977 | year>1980
	
* Midpoints of 1982-1985 income variable (use "income82" to construct)
	gen income82_bins=.
	replace income82_bins=0.75*1000 if income82==1 //<1000
	replace income82_bins=2000 if income82==2 //1-3k
	replace income82_bins=4000 if income82==3 | income82==4 //3-5k
	replace income82_bins=6000 if income82==5 | income82==6 //5-7k
	replace income82_bins=8500 if income82==7 | income82==8 //7-10k
	replace income82_bins=12500 if income82==9 | income82==10 //10-15k
	replace income82_bins=17500 if income82==11 | income82==12 //15-20k
	replace income82_bins=22500 if income82==13 | income82==14 //20-25
	replace income82_bins=30000 if income82==15 //25-35
	replace income82_bins=42500 if income82==16 //35-50 
	replace income82_bins=1.25*50000 if income82==17 //50k
	replace income82_bins=. if year<1982 | year>1985
	
* Midpoints of 1986-1990 income variable (use "income86" to construct)
	gen income86_bins=.
	replace income86_bins=0.75*4000 if income86>=1 & income86<=3 //<4k
	replace income86_bins=5000 if income86==4 | income86==5 //4-6k
	replace income86_bins=7000 if income86==6 | income86==7 //6-8k
	replace income86_bins=10250 if income86==8 | income86==9 //8-12.5k
	replace income86_bins=15000 if income86==10 | income86==11 //12.5-17.5k
	replace income86_bins=20000 if income86==12 | income86==13 //17.5-22.5k
	replace income86_bins=26250 if income86==14 | income86==15 //22.5-30k
	replace income86_bins=35000 if income86==16 | income86==17 //30-40
	replace income86_bins=50000 if income86==18 | income86==19 //40-60 
	replace income86_bins=1.25*60000 if income86==20 //60k+
	replace income86_bins=. if year<1986 
	
/* Construct *one* family income variable by blending binned 
   income variables from previous steps */
	gen faminc=.
	replace faminc=income72_bins if year==1972
	replace faminc=income if year>1972 & year<1977
	replace faminc=income77_bins if year>=1977 & year<=1980
	replace faminc=income82_bins if year>=1982 & year<=1985
	replace faminc=income86_bins if year>=1986 & year<=1990
	label var faminc "Family income, using bins"

/*
	Note: The suffix "_son" of the following variables is used to match the 
	      variable names in other datasets. All respondents (i.e., male and female) 
	      are given a value for these variables. 
*/
	gen bottomcoded_son =.
	
	replace bottomcoded_son= (income72_bins==0.75*2000) if (year==1972 & income72_bins<.)
	tab bottomcoded_son if year==1972,m 
	tab income72_bins,m 
	
	replace bottomcoded_son= (income_bins==0.75*1000) if ((year>1972 & year<1977) & income_bins<.)
	tab bottomcoded_son if (year>1972 & year<1977), m
	tab bottomcoded_son,m 
	tab income_bins,m 
	
	replace bottomcoded_son= (income77_bins==0.75*1000) if ((year>=1977 & year<=1980) & income77_bins<.)
	tab bottomcoded_son if (year>=1977 & year<=1980), m
	tab bottomcoded_son,m 
	tab income77_bins,m 
	
	replace bottomcoded_son= (income82_bins==0.75*1000) if ((year>=1982 & year<=1985) & income82_bins<.)
	tab bottomcoded_son if (year>=1982 & year<=1985), m
	tab bottomcoded_son,m 
	tab income82_bins,m 	

	replace bottomcoded_son= (income86_bins==0.75*4000) if ((year>=1986 & year<=1990) & income86_bins<.)
	tab bottomcoded_son if (year>=1986 & year<=1990), m
	tab bottomcoded_son,m 
	tab income86_bins,m 		
	
	label var bottomcoded_son "Respondent income bottom coded"


	gen topcoded_son =.
	
	replace topcoded_son= (income72_bins==1.25*30000) if (year==1972 & income72_bins<.)
	tab topcoded_son if year==1972, m
	tab income72_bins,m 
	tab topcoded_son, m
	
	replace topcoded_son= (income_bins==1.25*25000) if ((year>1972 & year<1977) & income_bins<.)
	tab topcoded_son if (year>1972 & year<1977), m
	tab income_bins,m 
	tab topcoded_son, m

	replace topcoded_son= (income77_bins==1.25*50000) if ((year>=1977 & year<=1980) & income77_bins<.)
	tab topcoded_son if (year>=1977 & year<=1980), m
	tab income77_bins,m 
	tab topcoded_son, m
	
	replace topcoded_son= (income82_bins==1.25*50000) if ((year>=1982 & year<=1985) & income82_bins<.)
	tab topcoded_son if (year>=1982 & year<=1985), m
	tab income82_bins,m 
	tab topcoded_son, m
	
	replace topcoded_son= (income86_bins==1.25*60000) if ((year>=1986 & year<=1990) & income86_bins<.)
	tab topcoded_son if (year>=1986 & year<=1990), m
	tab income86_bins,m 
	tab topcoded_son, m
	
	label var topcoded_son "Respondent income top coded"

* Turn fam_inc into 1950 dollars using the CPI: https://data.bls.gov/timeseries/CUUR0000SA0 
	gen year_CPI = year-1
	merge m:1 year_CPI using "../CPI/CPI_deflator.dta"
	keep if _merge==3
	drop _merge
	
	gen fam_inc_real =.
	replace fam_inc_real = faminc * deflator 
	label var fam_inc_real "Family income (bins), in 1950 dollars"

	gen lnfaminc = ln(fam_inc_real)
	label var lnfaminc "Logged family income (bins)"

*------------------------------------------------------------------------------*
*------------------------------------------------------------------------------*
	
* keep relevant variables
	keep year id occR_70- lnfaminc age
	drop CPI* year_CPI income*_bins

	keep if age>=30 & age<=50
	keep if year>=1977 
	
	compress
	save ./output/GSS_77to90_analysis.dta, replace
